

global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/merge_all_data_wages.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

set more off

*This do file combines all the imported data and acts as a bit lof a filter for what we actually want to keep.

*A. Merge all data (for more information look into the load_data* files) 
*A.1. EUklems
*A.2. WIOD
*A.3. Exchange rates
*A.4 GDP data

*B. Variables to keep

*C. Create share total value added. 

*D. Merge with Swiss data (added separately - see "import_swiss_data.do")

***********************************************
********** A. MERGE ALL LOADED DATA  **********
***********************************************
*A.0. Load the countries file.
import excel ${mow_data_raw}/countries/countries.xlsx, sheet("Tabelle1") firstrow allstring clear
tempfile countries
save `countries', replace

* A.1. Take basic EUklems 
use  ${mow_data_proc}/EUklems_import.dta, replace
replace country="gbr" if country=="uk"
drop if year==0

* There are double Canada data for the manufacturing sector. 
drop if ls_wage==. & code=="D" & country=="can"

* A.2. Merge with WIOD data
merge 1:1 country year code using ${mow_data_proc}/WIOD_import.dta

drop _merge
replace country="uk" if country=="gbr"




* A.3. Merge with exchange rate data. 
merge m:1 country year using ${mow_data_proc}/UN_ER.dta

* Drop West Germany (dew). But first, use the data from West Germany before reunification.
* Overlap in 1991: scale such that West Germany and Germany-Germany have the same ls_wage ms_wage and hs_wage in 
* 1991. 
tempfile WIOD_ER
save `WIOD_ER', replace
keep if (country=="dew" | country=="ger")
sort year code country

foreach wage_var in ls_wage ms_wage hs_wage lms_wage {
	gen scale_2=.
	replace scale_2=`wage_var'/`wage_var'[_n+1] if year==1991 & country=="dew"
	bysort country code: egen scale_tot=sum(scale_2) 
	replace `wage_var'=`wage_var'/scale_tot if country=="dew"
	sort year code country
	replace `wage_var'=`wage_var'[_n-1] if country=="ger" & `wage_var'==. & year<1991
	drop scale_2 scale_tot
}
	gen keep_german=1
append using `WIOD_ER'
* drop the original set of Germanys
drop if keep_german==. & (country=="deu" | country=="ger")
* and drop West Germany (dew). 
drop if _merge==1											
drop _merge keep_german

* Rename to match with minimum wage data later on.
replace country="usa" if country=="usa-sic"
replace country="gbr" if country=="uk"
replace country="deu" if country=="ger"

* Delete auxilary data we don't need.
merge m:1 country using `countries'
keep if _merge==3
drop _merge
sort country code year

* A.5. Merge with GDP data. 
merge m:1 country year using ${mow_data_proc}/GDP.dta
drop if _merge==2
drop _merge

***********************************************
********** B. VARIABLES TO KEEP ***************
***********************************************

* We keep total manufacturing, total industries and individual manufacturing industries.
gen keepit=0
replace keepit=1 if inlist(code,"TOT","D","15t16","15","16","17t19","17t18","17","18")
replace keepit=1 if inlist(code,"19","20","21t22","21","22","221","22x","23t25","23")
replace keepit=1 if inlist(code,"24","244","24x","25","26","27t28","27","28","29")
replace keepit=1 if inlist(code,"30t33","30","31t32","31","313","31x","32","321")
replace keepit=1 if inlist(code,"322","323","33","331t3","334t5")
replace keepit=1 if inlist(code,"34t35","34","35","351","353","35x","36t37","36","37")

*We also have to keep some observations which have no code in order to keep the GDP data
*for some countries between 1990 and 1994. 
replace keepit=1 if inlist(code, "")
keep if keepit==1
drop keepit


*Replace where we have no code with TOT and also duplicate them with D to have GDP
*data for the years before 1994 for all countries. 
expand 2 if code==""
sort year country code 
quietly by year country code:  gen dup = cond(_N==1,0,_n)
replace code="TOT" if dup==1
replace code="D" if dup==2
drop dup

* Create the last variables that we need and remove the variables that we don't need
* Generate average wage and VA_P for EUklems. 
gen av_wage=LAB/H_EMP
gen VA_P=VA/VA_QI  
sort country code year

*Renormalize to 1995 to get VA_P. 
by country code: egen mgo=total(VA_P) if year==1995
by country code: egen mgo2=total(mgo) 
replace VA_P=VA_P/mgo2*100
drop mgo mgo2

 
* There is no variable for VA_P from EUklems for Canada in 2005, but there is for 2004. We use the 
* WIOD growth rate to inpute it.

gen VA_P_temp = .
replace VA_P_temp = VA_P if country == "can" & year == 2004 & code == "TOT"
summarize VA_P_temp, meanonly
replace VA_P_temp = r(mean) if country == "can" & year == 2005

gen VA_P_WIOD_temp = .
replace VA_P_WIOD_temp = VA_P_WIOD if country == "can" & year == 2004 & code == "TOT"
summarize VA_P_WIOD_temp, meanonly
replace VA_P_WIOD_temp = r(mean) if country == "can" & year == 2005

*for total
replace VA_P=(VA_P_temp*100)/VA_P_WIOD_temp if country=="can" & year==2005 & code=="TOT" 

*reuse The variable, as we take from the same orriginal variable, just different sector
replace VA_P_WIOD_temp = .
replace VA_P_WIOD_temp = VA_P_WIOD if country == "can" & year == 2004 & code == "D"
summarize VA_P_WIOD_temp, meanonly
replace VA_P_WIOD_temp = r(mean) if country == "can" & year == 2005

*for manufacturing
replace VA_P=(VA_P[_n-1]*100)/VA_P_WIOD_temp if country=="can" & year==2005 & code=="D"

drop VA_P_temp VA_P_WIOD_temp
sort country code year

* Renomralize to 2005. 
by country code: egen mgo=total(VA_P) if year==2005
by country code: egen mgo2=total(mgo)
replace VA_P=VA_P/mgo2*100
drop mgo*


*Renormalize the VA_QI too. 
sort country code year
by country code: egen mgo=total(VA_QI) if year==2005
by country code: egen mgo2=total(mgo)
replace VA_QI=VA_QI/mgo2*100
drop mgo*


* Only keep important variables
keep country year code desc av_wage* VA_P* va_emp* lms_wage* ls_wage* ms_wage* hs_wage* exchangerate  VA VA_WIOD VA_QI  H_LS* H_MS* H_HS* LABLS* LABMS* LABHS* GDP* pop cap* log* def_GDP_own_05 def_GDP_own_95 

label var av_wage "Average wage"
label var VA_P "real VA"

tempfile wage_combined
save `wage_combined', replace

***********************************************************************
****C. Create share of total value added that's manufacturing D/TOT ***
***********************************************************************


* This will only be for industries that already exist, ie. we can't do what we do for wages and equate wages in industry 15 and 16 
* with that from 15t16. We don't know the split.

keep year country code VA*
gen VA_temp=VA

*WIOD data from 1995 on. 
replace VA_temp=VA_WIOD if year>=1995

bysort country year: egen VA_temp_TOT=sum(VA_temp) if code=="TOT"
by country year: egen VA_temp_TOT_2=sum(VA_temp_TOT)

*create the sectors share of total value added.
gen code_share=VA_temp/VA_temp_TOT_2
clonevar code_VA = VA_temp
keep year country code code_share code_VA

*merge back to the original data.
merge 1:1 country code year using `wage_combined', nogenerate

save ${mow_data_proc}/wage_combined.dta, replace

***********************************************
********D. ADD SWISS DATA   *******************
*********************************************** 
* Include swiss data from various sources. We will treat it as if it's from WIOD.
* Note, these are wage data. We multiply all by 1.2 to reflect total labor costs,
* which during the period 2000-2010 was persistently close to the ratio of the two. 

use ${mow_data_proc}/swiss_import.dta

*Add the GDP data for Switzerland. 
merge m:1 year country using ${mow_data_proc}/GDP.dta
drop if _merge==2
drop _merge

*Add the code_share variable like above. 
gen VA_temp=VA
sort country year
bysort country year: egen VA_temp_TOT=sum(VA_temp) if code=="TOT"
by country year: egen VA_temp_TOT_2=sum(VA_temp_TOT)
gen code_share=VA_temp/VA_temp_TOT_2
clonevar code_VA = VA_temp
drop VA_temp VA_temp_TOT VA_temp_TOT_2

tempfile swiss
save `swiss', replace

*Merge Swiss and other data. 
use ${mow_data_proc}/wage_combined.dta, clear
append using `swiss'
duplicates drop

*overwrite the wage_combined file.
sleep 1500
save ${mow_data_proc}/wage_combined.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat